﻿using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;

namespace TextToVoice
{
    /// <summary>
    /// 正则表达式分段落chapter
    /// </summary>
   public class ChapterCut
    {
        public List<InnerChapter> getChapters(string regxStrTitle,string regxStrSymbol, string filePath)
        {
            string[] lines = File.ReadAllLines(filePath, Encoding.Default);
            List<InnerChapter> chapters = new List<InnerChapter>();
            InnerChapter chapter = new InnerChapter();
            int chapterNo = 1;
            foreach (string tmpLine in lines)
            {
                if (this.isTitleLine(regxStrTitle, regxStrSymbol, tmpLine))  //是标题行
                {
                    if (!string.IsNullOrEmpty(chapter.title))     //表示碰到下一章的标题了
                    {
                        chapters.Add(chapter);
                        //然后清空,并加上当前行标题行
                        chapter = new InnerChapter();
                        chapter.title = tmpLine;
                    }
                    else
                    {
                        chapter.title = tmpLine;
                    }
                    chapter.no = chapterNo++;
                }
                else
                {
                    if(chapter!=null && !string.IsNullOrEmpty(chapter.title))       //有标题才能有内容
                    chapter.content.Append(tmpLine);
                }
            }
            return chapters;
        }

        /// <summary>
        /// 判断该行是否包含 章节关键字，即是否为标题行
        /// </summary>
        /// <param name="lineText"></param>
        public bool isTitleLine(string regxStrTitle,string regxStrSymbol, string lineText )
        {
            bool ret = false;
            //string regxStrSymbol = "[,.，。；;!]";
            if (Regex.IsMatch(lineText, regxStrTitle) && lineText.Length<50 && !Regex.IsMatch(lineText, regxStrSymbol))    //该行是标题行 ，包含章节标题关键字，且该行没有标点符号，且行文本长度在20以下。
            {
                ret = true;
            }
            return ret;
        }
    }

  
}
